import pandas as pd
import json
from tqdm import tqdm


def read_excel(file_path: str):
    df = pd.read_excel(file_path, sheet_name="Sheet1")
    result = df.apply(
        lambda row: {'language': row['语言'], 'scene': row['场景'], 'input': row['用例名称'],
                     'target': row['target'] if 'target' in row else '',
                     'keyWords': row['keyWords'] if 'keyWords' in row else ''
                     }, axis=1).tolist()
    # print(result)
    return result


def save(result, output):
    with open(output, "w", encoding="utf-8") as f:
        json.dump(result, f, indent=2, ensure_ascii=False)
        print(f"✅ 已生成 {len(result)}条 ")


def save_jsonl(result, output):
    with open(output, "w", encoding="utf-8") as f:
        for ins in tqdm(result, desc="swe-bench-verified 内容提取中..."):
            f.write(json.dumps(ins, ensure_ascii=False) + "\n")
    print(f"✅ 已生成 {len(result)}条 ")


if __name__ == '__main__':
    out_file = r"D:\JunTuan\yuanShao\0916-农商代码助手\rgzn_all_kk_2025022402.json"
    input_file = r"D:\JunTuan\yuanShao\0916-农商代码助手\rgzn_all_kk_2025022402.xlsx"
    data = read_excel(input_file)
    save_jsonl(data, out_file)
